
import sys
import pandas as pd
import numpy as np

from responsibly.fairness.interventions.threshold import find_thresholds

path = sys.argv[1]
print(path)

file_name_roc = path + "roc.csv"
file_name_prop = path + "proportions.csv"
file_name_br = path + "base_rates.csv"

df_roc_all = pd.read_csv(file_name_roc)
df_prop = pd.read_csv(file_name_prop)
df_br = pd.read_csv(file_name_br)

df_prop

prop_majority_ = df_prop[df_prop["group"] == "Majority"]["pct"].values[0]
prop_minority_ = df_prop[df_prop["group"] == "Minority"]["pct"].values[0]
br_total_ = df_br[df_br["group"] == "Total"]["pct"].values[0]
br_majority_ = df_br[df_br["group"] == "Majority"]["pct"].values[0]
br_minority_ = df_br[df_br["group"] == "Minority"]["pct"].values[0]


dict_rocs = {}

dict_prop = {"Majority": prop_majority_,
             "Minority": prop_minority_}
base_rate_ = br_total_
s_base_rates = pd.Series([br_majority_, br_minority_])
s_base_rates.index = ["Majority", "Minority"]



for VALUE_TYPE in ["value_riskscore", "value_xgb", "value_logistic"]:
    for LOSS_PROFIT_RATIO in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]:

        df_roc = df_roc_all[df_roc_all["value_type"] == VALUE_TYPE]

        COST_MATRIX = [[0, -LOSS_PROFIT_RATIO/(LOSS_PROFIT_RATIO + 1)],
                       [0,  1/(LOSS_PROFIT_RATIO + 1)]]


        cra_majority_ = 1
        cra_label_ = "Majority"

        arr_roc = np.array([df_roc.loc[df_roc["d_Income_Level"] == cra_majority_, "fpr"],
                            df_roc.loc[df_roc["d_Income_Level"] == cra_majority_, "tpr"],
                            df_roc.loc[df_roc["d_Income_Level"] == cra_majority_, "threshold"]])

        dict_rocs[cra_label_] = arr_roc


        cra_majority = 0
        cra_label_ = "Minority"

        arr_roc = np.array([df_roc.loc[df_roc["d_Income_Level"] == cra_majority, "fpr"],
                            df_roc.loc[df_roc["d_Income_Level"] == cra_majority, "tpr"],
                            df_roc.loc[df_roc["d_Income_Level"] == cra_majority, "threshold"]])

        dict_rocs[cra_label_] = arr_roc

        thresholds_data = find_thresholds(dict_rocs,
                                          dict_prop,
                                          base_rate_,
                                          s_base_rates,
                                          COST_MATRIX)

        df_thresholds = pd.DataFrame({
            "value_type": [VALUE_TYPE, VALUE_TYPE, VALUE_TYPE],
            "loss_profit_ratio": [LOSS_PROFIT_RATIO, LOSS_PROFIT_RATIO, LOSS_PROFIT_RATIO],
            "thresh_type": ["single", "min_cost", "tpr"],
            "value_majority": [thresholds_data["single"][0], thresholds_data["min_cost"][0]["Majority"], thresholds_data["fnr"][0]["Majority"]],
            "value_minority": [thresholds_data["single"][0], thresholds_data["min_cost"][0]["Minority"], thresholds_data["fnr"][0]["Minority"]]
                                     })

        file_name_out = path + "thresh_" + VALUE_TYPE + "_ltr" + str(LOSS_PROFIT_RATIO) + ".csv"

        df_thresholds.to_csv(file_name_out)
